## Loading required package: ggplot2
## Loading required package: grid
## Loading required package: gridExtra
## Loading required package: reshape2
## Loading required package: ROCR
## Loading required package: gplots
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
##
## Loading required package: plyr
## Loading required package: stringr
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
yVars <- c('yCat','yNumeric')
yName <- 'yNumeric'
set.seed(232567)
vplan1 <- list(designVar('x1',10))
vplan2 <- list(vplan1[[1]],
designNoiseVar('n1',500))
experiments <- list(
list(vplan=vplan1,fnFitter=lrFitter,
eName="one variable, linear regression"),
list(vplan=vplan2,fnFitter=lrFitter,
eName="one variable plus noise variable, linear regression"),
list(vplan=vplan1,fnFitter=dFitter,
eName="one variable, diagonal regression"),
list(vplan=vplan2,fnFitter=dFitter,
eName="one variable plus noise variable, diagonal regression")
)
for(expmt in experiments) {
set.seed(232567)
vplan <- expmt$vplan
fnFitter <- expmt$fnFitter
eName <- expmt$eName
print("*************************************************************")
print(eName)
dTrain <- generateExample(vplan,2000) # Training set
vars <- setdiff(colnames(dTrain),yVars)
dCal <- generateExample(vplan,10000) # Used to pick sigma
dTest <- generateExample(vplan,10000) # Pure holdout test
findSigma <- function(cl,
yName,
yVars,
dTrain,
vars,
dCal) {
mkWorker1 <- function(yName,
yVars,
dTrain,
vars,
dCal) {
force(yName)
force(yVars)
force(dTrain)
force(vars)
force(dCal)
bindToEnv(objNames=sourcedFns,
fnFitter)
function(sigma) {
scoresB <- numeric(3)
for(rep in seq_len(length(scoresB))) {
bCoder <- trainEffectCoderR(dTrain,yName,vars,sigma)
dTrainB <- bCoder$codeFrameR(dTrain)
dCalB <- bCoder$codeFrameR(dCal)
varsB <- setdiff(colnames(dTrainB),yVars)
preds <- fnFitter(yName,varsB,dTrainB,dCalB)
dCalB$pred <- preds$appPred
scoresB[[rep]] <- rmse(dCalB$pred,dCalB[[yName]])
}
list(scoreB=mean(scoresB),sigma=sigma)
}
}
sigmaTargets <- (seq_len(41)-1)
worker <- mkWorker1(yName,
yVars,
dTrain,
vars,
dCal)
if(!is.null(cl)) {
results <- parallel::parLapplyLB(cl,sigmaTargets,worker)
} else {
results <- vector(mode='list',length=length(sigmaTargets))
for(ii in seq_len(length(sigmaTargets))) {
results[[ii]] <- worker(sigmaTargets[[ii]])
}
}
bSigmaBest = 0
bestB = Inf
for(res in results) {
sigma <- res$sigma
scoreB <- res$scoreB
if(scoreB<bestB) {
bestB <- scoreB
bSigmaBest <- sigma
}
}
bSigmaBest
}
bSigmaBest <- findSigma(cl,
yName,
yVars,
dTrain,
vars,
dCal)
print(paste('bSigmaBest',bSigmaBest))
print('naive effects model')
bCoder <- trainEffectCoderR(dTrain,yName,vars,0)
dTrainB <- bCoder$codeFrameR(dTrain)
dTestB <- bCoder$codeFrameR(dTest)
varsB <- setdiff(colnames(dTrainB),yVars)
preds <- fnFitter(yName,varsB,dTrainB,dTestB,verbose=TRUE)
dTrainB$pred <- preds$trainPred
print(paste('train rmse',rmse(dTrainB$pred,dTrainB[[yName]])))
print(WVPlots::ScatterHist(dTrainB,'pred',yName,
paste(eName,
'naive effects model train',
sep='\n'),
smoothmethod='lm',annot_size=2))
dTestB$pred <- preds$appPred
print(paste('test rmse',rmse(dTestB$pred,dTestB[[yName]])))
print(WVPlots::ScatterHist(dTestB,'pred',yName,
paste(eName,
'naive effects model test',
sep='\n'),
smoothmethod='lm',annot_size=2))
print(paste('effects model, sigma=',bSigmaBest))
bCoder <- trainEffectCoderR(dTrain,yName,vars,bSigmaBest)
dTrainB <- bCoder$codeFrameR(dTrain)
dTestB <- bCoder$codeFrameR(dTest)
varsB <- setdiff(colnames(dTrainB),yVars)
preds <- fnFitter(yName,varsB,dTrainB,dTestB,verbose=TRUE)
dTrainB$pred <- preds$trainPred
print(paste('train rmse',rmse(dTrainB$pred,dTrainB[[yName]])))
print(WVPlots::ScatterHist(dTrainB,'pred',yName,
paste(eName,
'\neffects model train, sigma=',
bSigmaBest),
smoothmethod='lm',annot_size=2))
dTestB$pred <- preds$appPred
print(paste('test rmse',rmse(dTestB$pred,dTestB[[yName]])))
print(WVPlots::ScatterHist(dTestB,'pred',yName,
paste(eName,
'\neffects model test, sigma=',
bSigmaBest),
smoothmethod='lm',annot_size=2))
print('effects model, jacknifed')
bCoder <- trainEffectCoderR(dTrain,yName,vars,0)
# dTrainB <- bCoder$codeFrame(dTrain)
# dTrainB <- bCoder$codeFrame(dCal)
dTrainB <- jackknifeEffectCodeR(dTrain,yName,vars)
dTestB <- bCoder$codeFrameR(dTest)
varsB <- setdiff(colnames(dTrainB),yVars)
preds <- fnFitter(yName,varsB,dTrainB,dTestB,verbose=TRUE)
dTrainB$pred <- preds$trainPred
print(paste('train rmse',rmse(dTrainB$pred,dTrainB[[yName]])))
print(WVPlots::ScatterHist(dTrainB,'pred',yName,
paste(eName,
'effects model train, jackknifed',
sep='\n'),
smoothmethod='lm',annot_size=2))
dTestB$pred <- preds$appPred
print(paste('test rmse',rmse(dTestB$pred,dTestB[[yName]])))
print(WVPlots::ScatterHist(dTestB,'pred',yName,
paste(eName,
'effects model test, jackknifed',
sep='\n'),
smoothmethod='lm',annot_size=2))
mkExpmtRunner <- function(vplan) {
force(vplan)
bindToEnv(objNames=sourcedFns,
findSigma,
sourcedFns,
fnFitter)
function(repID) {
# set up experiment
yVars <- c('yCat','yNumeric')
yName <- 'yNumeric'
dTrain <- generateExample(vplan,2000) # Training set
vars <- setdiff(colnames(dTrain),yVars)
dCal <- generateExample(vplan,10000) # Used to pick sigma
dTest <- generateExample(vplan,10000) # Pure holdout test
# run naive mode
bCoder <- trainEffectCoderR(dTrain,yName,vars,0)
dTrainB <- bCoder$codeFrameR(dTrain)
dTestB <- bCoder$codeFrameR(dTest)
varsB <- setdiff(colnames(dTrainB),yVars)
preds <- fnFitter(yName,varsB,dTrainB,dTestB)
dTrainB$pred <- preds$trainPred
trainRMSE <- rmse(dTrainB$pred,dTrainB[[yName]])
dTestB$pred <- preds$appPred
testRMSE <- rmse(dTestB$pred,dTestB[[yName]])
f1 <- data.frame(repID=repID,
bSigmaBest=NA,
what='NaiveModel',
trainRMSE=trainRMSE,
testRMSE=testRMSE,
stringsAsFactors = FALSE)
# sigma model
bSigmaBest <- findSigma(NULL,
yName,
yVars,
dTrain,
vars,
dCal)
bCoder <- trainEffectCoderR(dTrain,yName,vars,bSigmaBest)
dTrainB <- bCoder$codeFrameR(dTrain)
dTestB <- bCoder$codeFrameR(dTest)
varsB <- setdiff(colnames(dTrainB),yVars)
preds <- fnFitter(yName,varsB,dTrainB,dTestB)
dTrainB$pred <- preds$trainPred
trainRMSE <- rmse(dTrainB$pred,dTrainB[[yName]])
dTestB$pred <- preds$appPred
testRMSE <- rmse(dTestB$pred,dTestB[[yName]])
f2 <- data.frame(repID=repID,
bSigmaBest=bSigmaBest,
what='NoisedModel',
trainRMSE=trainRMSE,
testRMSE=testRMSE,
stringsAsFactors = FALSE)
# jackknifed model
bCoder <- trainEffectCoderR(dTrain,yName,vars,0)
dTrainB <- jackknifeEffectCodeR(dTrain,yName,vars)
dTestB <- bCoder$codeFrameR(dTest)
varsB <- setdiff(colnames(dTrainB),yVars)
preds <- fnFitter(yName,varsB,dTrainB,dTestB)
dTrainB$pred <- preds$trainPred
trainRMSE <- rmse(dTrainB$pred,dTrainB[[yName]])
dTestB$pred <- preds$appPred
testRMSE <- rmse(dTestB$pred,dTestB[[yName]])
f3 <- data.frame(repID=repID,
bSigmaBest=NA,
what='JackknifeModel',
trainRMSE=trainRMSE,
testRMSE=testRMSE,
stringsAsFactors = FALSE)
rbind(f1,f2,f3)
}
}
eworker <- mkExpmtRunner(vplan)
res <- parallel::parLapplyLB(cl,1:200,eworker)
res <- do.call(rbind,res)
print(ggplot(data=res,aes(x=testRMSE,color=what)) +
geom_density(adjust=0.5,trim=TRUE) +
ggtitle(paste(eName,'test RMSE, noised model',sep='\n')))
for(w in sort(unique(res$what))) {
print("********")
print(w)
ri <- res[res$what==w,]
print(summary(ri$testRMSE))
print(sqrt(var(ri$testRMSE)))
print("********")
}
rm <- sqldf('
SELECT
rJ.repID,
rN.testRMSE - rJ.testRMSE NrmseMinusJrmse,
rN.testRMSE nTestRMSE,
rJ.testRMSE jTestRMSE,
rN.bSigmaBest
FROM
res rJ
JOIN
res rN
ON
rJ.repID=rN.repID
WHERE
rJ.what="JackknifeModel" AND
rN.what="NoisedModel"
')
print(ggplot(data=rm,aes(x=NrmseMinusJrmse)) +
geom_density(adjust=0.5,trim=TRUE) +
ggtitle(paste(eName,'noise test RMSE minus jackknife test RMSE',
sep='\n')))
print(ggplot(data=res[res$what=='NoisedModel',],aes(x=bSigmaBest)) +
geom_density(adjust=0.5) +
ggtitle(eName))
print(ggplot(data=rm,aes(x=nTestRMSE,y=jTestRMSE)) +
geom_point() +
geom_abline(slope=1,intercept=0) +
coord_fixed() +
ggtitle(paste(eName,
'noised model performance versus jackknifed model performance',
sep='\n')))
print(WVPlots::ScatterHist(rm,'bSigmaBest','NrmseMinusJrmse',
paste(eName,
'sigma selected versus delta performance',
sep='\n'),
smoothmethod='lm',annot_size=2))
print(WVPlots::ScatterHist(rm,'bSigmaBest','nTestRMSE',
paste(eName,
'sigma selected versus performance',
sep='\n'),
smoothmethod='lm',annot_size=2))
print("*************************************************************")
}
## [1] "*************************************************************"
## [1] "one variable, linear regression"
## [1] "bSigmaBest 1"
## [1] "naive effects model"
##
## Call:
## lm(formula = formulaL, data = trainData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3721 -0.6891 -0.0037 0.6848 3.7826
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.20623 0.02260 9.125 <2e-16 ***
## x1 1.00000 0.03685 27.137 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.011 on 1998 degrees of freedom
## Multiple R-squared: 0.2693, Adjusted R-squared: 0.269
## F-statistic: 736.4 on 1 and 1998 DF, p-value: < 2.2e-16
##
## [1] "train rmse 1.01025938596012"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.140]
## [1] "test rmse 0.999915402747535"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.293]
## [1] "effects model, sigma= 1"
##
## Call:
## lm(formula = formulaL, data = trainData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3750 -0.6883 -0.0014 0.6870 3.7847
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.20624 0.02260 9.125 <2e-16 ***
## x1 1.00213 0.03693 27.135 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.011 on 1998 degrees of freedom
## Multiple R-squared: 0.2693, Adjusted R-squared: 0.2689
## F-statistic: 736.3 on 1 and 1998 DF, p-value: < 2.2e-16
##
## [1] "train rmse 1.01028030952866"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.446]
## [1] "test rmse 1.00016906466239"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.599]
## [1] "effects model, jacknifed"
##
## Call:
## lm(formula = formulaL, data = trainData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3933 -0.6946 -0.0039 0.6875 3.7985
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2062 0.0227 9.084 <2e-16 ***
## x1 0.9871 0.0370 26.682 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.015 on 1998 degrees of freedom
## Multiple R-squared: 0.2627, Adjusted R-squared: 0.2623
## F-statistic: 712 on 1 and 1998 DF, p-value: < 2.2e-16
##
## [1] "train rmse 1.01481235978284"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.752]
## [1] "test rmse 1.00008428967326"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).

## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.905]

## [1] "********"
## [1] "JackknifeModel"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.9848 0.9969 1.0020 1.0020 1.0070 1.0190
## [1] 0.007180245
## [1] "********"
## [1] "********"
## [1] "NaiveModel"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.9846 0.9968 1.0020 1.0020 1.0070 1.0190
## [1] 0.007190209
## [1] "********"
## [1] "********"
## [1] "NoisedModel"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.9848 0.9971 1.0020 1.0020 1.0070 1.0240
## [1] 0.007258813
## [1] "********"


## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.1266]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.1419]
## [1] "*************************************************************"
## [1] "*************************************************************"
## [1] "one variable plus noise variable, linear regression"
## [1] "bSigmaBest 8"
## [1] "naive effects model"
##
## Call:
## lm(formula = formulaL, data = trainData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9216 -0.6181 0.0055 0.6225 3.5298
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.20622 0.02058 10.02 <2e-16 ***
## x1 0.83459 0.03452 24.17 <2e-16 ***
## n1 0.78131 0.03844 20.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9203 on 1997 degrees of freedom
## Multiple R-squared: 0.3946, Adjusted R-squared: 0.394
## F-statistic: 650.8 on 2 and 1997 DF, p-value: < 2.2e-16
##
## [1] "train rmse 0.919591353886876"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.1572]
## [1] "test rmse 1.12246743812363"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.1725]
## [1] "effects model, sigma= 8"
##
## Call:
## lm(formula = formulaL, data = trainData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4002 -0.6792 -0.0085 0.6804 3.6877
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.207817 0.022622 9.186 < 2e-16 ***
## x1 1.001711 0.036979 27.088 < 2e-16 ***
## n1 0.011005 0.003316 3.319 0.00092 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.009 on 1997 degrees of freedom
## Multiple R-squared: 0.2725, Adjusted R-squared: 0.2718
## F-statistic: 374.1 on 2 and 1997 DF, p-value: < 2.2e-16
##
## [1] "train rmse 1.00803892027929"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.1878]
## [1] "test rmse 1.01215427910968"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.2031]
## [1] "effects model, jacknifed"
##
## Call:
## lm(formula = formulaL, data = trainData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3986 -0.6920 -0.0077 0.6877 3.8126
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.20643 0.02268 9.101 <2e-16 ***
## x1 0.98425 0.03698 26.614 <2e-16 ***
## n1 -0.07739 0.03479 -2.224 0.0262 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.014 on 1997 degrees of freedom
## Multiple R-squared: 0.2645, Adjusted R-squared: 0.2638
## F-statistic: 359.2 on 2 and 1997 DF, p-value: < 2.2e-16
##
## [1] "train rmse 1.01355772650768"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.2184]
## [1] "test rmse 1.00913108707443"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).

## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.2337]

## [1] "********"
## [1] "JackknifeModel"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.9851 0.9982 1.0030 1.0030 1.0080 1.0210
## [1] 0.006852836
## [1] "********"
## [1] "********"
## [1] "NaiveModel"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.096 1.126 1.134 1.135 1.143 1.172
## [1] 0.01407833
## [1] "********"
## [1] "********"
## [1] "NoisedModel"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.9888 1.0030 1.0080 1.0080 1.0120 1.0450
## [1] 0.008048595
## [1] "********"


## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.2698]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.2851]
## [1] "*************************************************************"
## [1] "*************************************************************"
## [1] "one variable, diagonal regression"
## [1] "bSigmaBest 13"
## [1] "naive effects model"
## x1
## 1.000005
## [1] "train rmse 1.03109338373284"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.3004]
## [1] "test rmse 1.02233899515915"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.3157]
## [1] "effects model, sigma= 13"
## x1
## 1.007208
## [1] "train rmse 1.03387579088062"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.3310]
## [1] "test rmse 1.02796779024538"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.3463]
## [1] "effects model, jacknifed"
## x1
## 0.9871528
## [1] "train rmse 1.03555476179036"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.3616]
## [1] "test rmse 1.02246501285093"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).

## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.3769]

## [1] "********"
## [1] "JackknifeModel"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.002 1.015 1.020 1.020 1.025 1.038
## [1] 0.007704492
## [1] "********"
## [1] "********"
## [1] "NaiveModel"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.001 1.015 1.020 1.020 1.025 1.038
## [1] 0.007740295
## [1] "********"
## [1] "********"
## [1] "NoisedModel"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.001 1.017 1.022 1.023 1.029 1.053
## [1] 0.009453189
## [1] "********"


## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.4130]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.4283]
## [1] "*************************************************************"
## [1] "*************************************************************"
## [1] "one variable plus noise variable, diagonal regression"
## [1] "bSigmaBest 19"
## [1] "naive effects model"
## x1 n1
## 1.000005 1.000333
## [1] "train rmse 0.958540237968956"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.4436]
## [1] "test rmse 1.20618715828122"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.4589]
## [1] "effects model, sigma= 19"
## x1 n1
## 0.988586923 0.003381102
## [1] "train rmse 1.03609580082898"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.4742]
## [1] "test rmse 1.0389153388548"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.4895]
## [1] "effects model, jacknifed"
## x1 n1
## 0.9871528 -0.1088369
## [1] "train rmse 1.03458802692346"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.5048]
## [1] "test rmse 1.03176880530955"
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).

## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.5201]

## [1] "********"
## [1] "JackknifeModel"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.001 1.016 1.022 1.022 1.027 1.044
## [1] 0.007913222
## [1] "********"
## [1] "********"
## [1] "NaiveModel"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.178 1.208 1.219 1.220 1.232 1.276
## [1] 0.01667267
## [1] "********"
## [1] "********"
## [1] "NoisedModel"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.004 1.022 1.027 1.030 1.034 1.145
## [1] 0.01595532
## [1] "********"


## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).
## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.5562]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 3 rows containing missing values (geom_bar).

## TableGrob (3 x 2) "arrange": 5 grobs
## z cells name grob
## 1 1 (2-2,1-1) arrange gtable[layout]
## 2 2 (2-2,2-2) arrange gtable[layout]
## 3 3 (3-3,1-1) arrange gtable[layout]
## 4 4 (3-3,2-2) arrange gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.5715]
## [1] "*************************************************************"
if(!is.null(cl)) {
parallel::stopCluster(cl)
cl <- NULL
}